This notebook attempts to reproduce the figures in the FiveThirtyEight article “The Lasting Legacy of Redlining” originally authored by Ryan Best and Elena Mejía.
We obtain the project data from the FiveThirtyEight GitHub.
Use the tidycensus package to load shape files and 2020
census data.
This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
# Cache shapefiles
options(tigris_use_cache = TRUE)
# Get race data with shape files
# TODO: Allow regeneration of these files using the driver script.
# TODO: Allow users to invoke census API
blocks_file <- "../data/blocks.rds"
if(!file.exists(blocks_file)){
blocks <- get_decennial(geography = "block",
state="Pennsylvania",
variables = c("P2_001N", # Total
"P2_002N", # Total Hispanic or Latino
"P2_005N", # Total Non-Hispanic White
"P2_006N", # Total Non-Hispanic Black
"P2_007N", # Total Non-Hispanic Indian
"P2_008N", # Total Non-Hispanic Asian
"P2_009N", # Total Non-Hispanic Islander
"P2_010N", # Total Non-Hispanic Other
"P2_011N"), # Total Non-Hispanic two or more races
year = 2020, geometry = TRUE) %>%
pivot_wider(names_from = variable, values_from = value)
# Manually cache the data to avoid downloading again
saveRDS(blocks, file = "../data/blocks.rds")
} else {
blocks <- read_rds(blocks_file)
}
states <- get_decennial(geography = "state",
variables = c("STATE"),
year = 2020, geometry = TRUE)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data summary file
## Note: 2020 decennial Census data use differential privacy, a technique that
## introduces errors into data to preserve respondent confidentiality.
## i Small counts should be interpreted with caution.
## i See https://www.census.gov/library/fact-sheets/2021/protecting-the-confidentiality-of-the-2020-census-redistricting-data.html for additional guidance.
## This message is displayed once per session.
# Get the total geography of each metro area
metros <- get_decennial(geography = "cbsa",
variables = c("P2_001N"),
year = 2020, geometry = TRUE)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data summary file
metros$NAME <- substr(metros$NAME, 1, nchar(metros$NAME)-11) # The 538 data has "Metro Area" and "Micro Area" removed from NAME
metro_grades <- left_join(metro_grades, metros, by=c("metro_area"="NAME")) # associate each metro area with a geography
metro_grades$centroid <- st_centroid(metro_grades$geometry)
# Filter for the blocks mapped by HOLC
holc_blocks <- inner_join(zone_matches, blocks, by=c("block_geoid20"="GEOID"))
# Select and reformat Pittsburgh
pitt <- holc_blocks %>% filter(holc_city == "Pittsburgh")
surrounding_counts <- blocks_buffer %>%
summarize(white = sum(white),
black = sum(black),
hispanic = sum(hispanic),
asian = sum(asian),
other = sum(other_race))
surrounding_pct <- as.data.frame(t(round(surrounding_counts / sum(surrounding_counts), digits = 3)*100))
colnames(surrounding_pct) <- "Percentage"
surrounding_pct$Race <- c("White", "Black", "Latino", "Asian", "Other")
surrounding_pct$Race <- factor(surrounding_pct$Race, levels=rev(c("White", "Black", "Latino", "Asian", "Other")), ordered = T)
surrounding_pct$tmp <- factor("1", level=c("0","1"))
surrounding_pct$tmp_lab <- factor("1", level=c("0","1"))
surrounding_pct$label_loc <- cumsum(surrounding_pct$Percentage) - surrounding_pct$Percentage / 2
surrounding_pct$label <- paste(surrounding_pct$Race, " - ", surrounding_pct$Percentage, "%", sep="")
p <- ggplot() + geom_bar(data=surrounding_pct, aes(x = Percentage, y = tmp, fill = Race), color="white", lwd=1, width=0.2, stat="identity", show.legend=F) +
geom_text(data=filter(surrounding_pct, Race %in% c("White", "Black")), aes(x = label_loc, y = tmp_lab, label = label), position = position_nudge(x = 0, y = -0.15), size = 4) +
theme_void() +
scale_fill_manual(values = rev(points$color))
surrounding_bar <- ggplotly(p, tooltip = c("Percentage", "Race"), width = 800, height = 200) %>%
layout(
showlegend = F,
legend = list(orientation = 'h'),
yaxis = list(
color = '#ffffff',
gridcolor = '#ffff'),
xaxis = list(
color = '#ffffff',
gridcolor = '#ffff')
)
bar_grade <- function(metro_grades, grade){
holc_pct <- as.data.frame(t(metro_grades %>%
filter(metro_area == "Pittsburgh, PA", holc_grade == grade) %>%
select(pct_white, pct_black, pct_hisp, pct_asian, pct_other)))
colnames(holc_pct) <- "Percentage"
holc_pct$Percentage <- round(holc_pct$Percentage, digits = 1)
holc_pct$Race <- c("White", "Black", "Latino", "Asian", "Other")
holc_pct$Race <- factor(holc_pct$Race, levels=rev(c("White", "Black", "Latino", "Asian", "Other")))
holc_pct$Race2 <- holc_pct$Race # Need this so "Race" isn't duplicated due to grouping on ggplot
holc_pct$tmp <- factor("1", level=c("0","1"))
holc_pct$tmp_lab <- factor("1", level=c("0","1"))
holc_pct$label_loc <- cumsum(holc_pct$Percentage) - holc_pct$Percentage / 2
holc_pct$label <- paste(holc_pct$Race, " - ", holc_pct$Percentage, "%", sep="")
p <- ggplot() + geom_bar(data=holc_pct, aes(x = Percentage, y = tmp, fill = Race, group = Race2), color="white", lwd=1, width=0.2, position="stack", stat="identity", show.legend=F) +
geom_text(data=filter(holc_pct, Percentage > 6), aes(x = label_loc, y = tmp_lab, label = label), position = position_nudge(x = 0, y = -0.2), size = 4) +
theme_void() +
scale_fill_manual(values = rev(points$color))
holc_bar <- ggplotly(p, tooltip = c("Percentage", "Race"), autosize = F, width = 800, height = 200) %>%
layout(
showlegend = F,
legend = list(orientation = 'h'),
yaxis = list(
color = '#ffffff',
gridcolor = '#ffff'),
xaxis = list(
color = '#ffffff',
gridcolor = '#ffff')
)
holc_bar
}
# The grid package allows us to print grobs to the graphics device by manipulating the viewport
# The ggplotify package allows us to turn arbitrary ggplot objects into grobs
# Both of these we use to overlay plots created with geom_sf previously
construct_surrounding_plot <- function(surrounding_area, PA_plot){
grid.newpage()
grid.draw(as.grob(surrounding_area))
# Place Pennsylvania in the correct frame of reference relative to the surrounding area
sample_vp <- viewport(x = 0.6, y = 0.05,
width = 0.25, height = 0.25,
just = c("left", "bottom"),
angle = 15)
pushViewport(sample_vp)
grid.draw(as.grob(PA_plot))
popViewport()
}
# Sample from every map grade
map_grade <- function(df, grade){
samp <- df %>% filter(holc_grade == grade)
print(sprintf("Saving Hispanic %s...", grade))
hispanic_path <- sprintf("../data/sampled_hispanic_%s.rds", grade)
if(!file.exists(hispanic_path)){
samp_hispanic <- samp %>% filter(hispanic > 0)
output <- st_sample(samp_hispanic$geometry2, samp_hispanic$hispanic, type="random", exact=F)
saveRDS(output, file = hispanic_path)
}
points_hispanic <- read_rds(hispanic_path)
print(sprintf("Saving Other %s...", grade))
other_path <- sprintf("../data/sampled_other_%s.rds", grade)
if(!file.exists(other_path)){
samp_other <- samp %>% filter(other_race > 0)
output <- st_sample(samp_other$geometry2, samp_other$other_race, type="random", exact=F)
saveRDS(output, file = other_path)
}
points_other <- read_rds(other_path)
print(sprintf("Saving Asian %s...", grade))
asian_path <- sprintf("../data/sampled_asian_%s.rds", grade)
if(!file.exists(asian_path)){
samp_asian <- samp %>% filter(asian > 0)
output <- st_sample(samp_asian$geometry2, samp_asian$asian, type="random", exact=F)
saveRDS(output, file = asian_path)
}
points_asian <- read_rds(asian_path)
print(sprintf("Saving Black %s...", grade))
black_path <- sprintf("../data/sampled_black_%s.rds", grade)
if(!file.exists(black_path)){
samp_black <- samp %>% filter(black > 0)
output <- st_sample(samp_black$geometry2, samp_black$black, type="random", exact=F)
saveRDS(output, file = black_path)
}
points_black <- read_rds(black_path)
print(sprintf("Saving White %s...", grade))
white_path <- sprintf("../data/sampled_white_%s.rds", grade)
if(!file.exists(white_path)){
samp_white <- samp %>% filter(white > 0)
output <- st_sample(samp_white$geometry2, samp_white$white, type="random", exact=F)
saveRDS(output, file = white_path)
}
points_white <- read_rds(white_path)
}
holc_samp <- inner_join(zone_matches, samp, by=c("block_geoid20"="GEOID"))
for(grade in c("A","B","C","D")){
if(!file.exists(sprintf("../data/sampled_white_%s.rds", grade)) ||
!file.exists(sprintf("../data/sampled_black_%s.rds", grade)) ||
!file.exists(sprintf("../data/sampled_hispanic_%s.rds", grade)) ||
!file.exists(sprintf("../data/sampled_asian_%s.rds", grade)) ||
!file.exists(sprintf("../data/sampled_other_%s.rds", grade))){
map_grade(holc_samp, grade)
}
}
# Construct HOLC plots
holc_plots <- list()
grades <- c(A = "Best", B = "Desirable", C = "Declining", D = "Hazardous")
for(i in c("A","B","C","D")){
points[[i]] <- c(st_combine(read_rds(sprintf("../data/sampled_white_%s.rds", i))),
st_combine(read_rds(sprintf("../data/sampled_black_%s.rds", i))),
st_combine(read_rds(sprintf("../data/sampled_hispanic_%s.rds", i))),
st_combine(read_rds(sprintf("../data/sampled_asian_%s.rds", i))),
st_combine(read_rds(sprintf("../data/sampled_other_%s.rds", i)))
)
holc_plots[[i]] <- ggmap(m) +
geom_sf(data = cutout, mapping = aes(geometry=geometry), color="white", fill="white", inherit.aes = FALSE) +
geom_sf(data = bound, mapping = aes(geometry=geometry), color="black", fill=NA, lwd = 0.5, inherit.aes = FALSE) +
geom_sf(data = points, mapping = aes(geometry=.data[[i]], color=race), size=0.01, inherit.aes = FALSE, show.legend=F) +
labs(title = sprintf("Pittsburgh's %s Zones", grades[[i]])) +
scale_color_manual(values = points$color) +
theme_void() + theme(plot.title=element_text(margin = margin(b=10), hjust = 0.45))
}
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
# Plot cleveland bar plot
cleveland <- metro_grades %>% filter(metro_area == "Cleveland-Elyria, OH") %>% select(holc_grade, pct_white, pct_black, pct_hisp, pct_asian, pct_other) %>% pivot_longer(pct_white:pct_other)
cleveland$holc_grade <- factor(cleveland$holc_grade, levels=rev(c("A","B","C","D")))
cleveland$holc_grade <- recode(cleveland$holc_grade, A="\"Best\"", B="\"Desirable\"", C="\"Declining\"", D="\"Hazardous\"")
cleveland$name <- recode(cleveland$name, pct_white="White", pct_black="Black", pct_hisp="Latino", pct_asian="Asian", pct_other = "Other")
cleveland$name <- factor(cleveland$name, levels=rev(c("White", "Black", "Latino", "Asian", "Other")))
ggplot(cleveland) + geom_bar(aes(x = value, y = holc_grade, fill = name), color="white", lwd=1, width=0.8, stat="identity", show.legend=T) +
theme_void() +
theme(axis.text.y = element_text(),
legend.position="top",
legend.title=element_blank()) +
scale_fill_manual(guide=guide_legend(reverse=T), values = rev(points$color))
### DIFFERENT STYLE FROM ARTICLE ###
metros_A <- metro_grades %>% filter(holc_grade == grade)
# Turn the points into longitude and latitude so we can repel the points on the ggplot
metros_A$lon <- map_dbl(metros_A$centroid, function(x){x[[1]]})
metros_A$lat <- map_dbl(metros_A$centroid, function(x){x[[2]]})
metros_A$pct_nonwhite <- 100 - metros_A$pct_white
metros_A$rad <- (metros_A$value^(1/3)) / 5000
nation_pie <- function(metro_grades, grade){
metros_A <- metro_grades %>% filter(holc_grade == grade)
# Turn the points into longitude and latitude so we can repel the points on the ggplot
metros_A$lon <- map_dbl(metros_A$centroid, function(x){x[[1]]})
metros_A$lat <- map_dbl(metros_A$centroid, function(x){x[[2]]})
metros_A$pct_nonwhite <- 100 - metros_A$pct_white
metros_A$rad <- (metros_A$value^(1/3)) / 200
mas <- metros_A %>% select(lon, lat, pct_white, pct_nonwhite, value, rad) %>% arrange(desc(value))
end <- nrow(mas)
for(i in 1:(nrow(mas)-1)){
cur <- mas[i,]
lon_dist <- cur$lon - mas$lon[(i+1):end]
lat_dist <- cur$lat - mas$lat[(i+1):end]
dists <- sqrt((lon_dist)^2 + (lat_dist)^2)
move <- mas$rad[i] / dists
move[move <= 1] <- 0
mas$lon[(i+1):end] <- mas$lon[(i+1):end] - lon_dist*move
mas$lat[(i+1):end] <- mas$lat[(i+1):end] - lat_dist*move
}
title <- ifelse(grade == "A", "Best", "Hazardous")
ggplot() +
ggtitle(title) +
geom_sf(data=filter(states, !(NAME %in% c("Alaska", "Hawaii", "Puerto Rico"))) , mapping = aes(geometry = geometry)) +
#geom_jitter(data=metros_A, mapping = aes(x = lon, y = lat)) +
geom_scatterpie(data=mas,
mapping = aes(x = lon, y = lat, r = rad),
cols=c("pct_white", "pct_nonwhite"),
color = "white", show.legend=F) +
scale_fill_manual(values = c("#ffb262","#808285")) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5))
}
A <- nation_pie(metro_grades, "A")
D <- nation_pie(metro_grades, "D")
grid.arrange(A, D, nrow=1)